/*****************************************************************************
This program calculates pscores for general and screening risk
*	at the student x program level by:
*		(i) 	identifying marginal applicants at each program
*		(ii) 	determining program cutoffs
*		(iii)	computing IK bandwidths for screened schools
*		(iv)	defining big thetas
*		(v)		calculating MIDs
*	This version uses simulated lottery numbers for those with missing lottery
*	numbers, as well as simulated offers.
*	----------------------------------------------------------------------------
*	inputs: 		match_file_for_pscore_with_sim_lotteries{yyyy}_new.dta
*						> output from A_clean_match_file
*					*data_for_bw_{yyyy}{grad}{suffix}.dta
*						> file to reload after running B1
*					**bw_for_schools_min_{yyyy}{suffix}.dta
*						> bandwidths produced by B1
*	dependencies:	B1_run_ik.do
*						> computing IK bandwidths
*					B2_create_runvars.do
*						> create running variables
*	----------------------------------------------------------------------------
*	intmd. outputs: *data_for_ik_{yyyy}{grad}{suffix}.dta
*						> save for IK bandwidth script
*					program_pscore{mod}{suffix}_{yyyy}_before_theta{grad}.dta
*						> file with first three primitives (before Theta assignment)
*	outputs: 		program_pscore{mod}{suffix}_{yyyy}{grad}.dta
*						> then feed into B2
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

* Syntax for program
capture program drop pscores_mdrd2_ms

program define pscores_mdrd2_ms

syntax , run_bw(real ) run_runvars(real) [suffix(str) insuffix(str) min_10_inbw(real 0) ///
dup_threshold(str ) delta_threshold(str )  half_bw(real 0)  double_bw(real 0)  grad_sep(real 0) bw(str)]

	clear

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/*****************************************************************************\
 We need the following primitives:

	i. 		marginal priority
	ii. 	cutoff
	iii.	bandwidth
	iv.		big thetas / t
	v.		MID
	vi. 	pscores

\******************************************************************************/

/*	 We have a loop that allows to run the entire program with different bandwidths
	 by outcome. We decided for MDRD2 to instead take the minimum bandwidth across
	 SAT Math, SAT EBRW, and Graduation as a conservative measure.
	 We still leave this option in here to allow to turn this feature back on in
	 case this is ever needed.  */

* Loop over test-score vs. graduation version of bandwidth

* Not using modifiedd rank/choice, etc

if `grad_sep' == 1 local runs 0 1
if `grad_sep' == 0 local runs 0

foreach grad in `runs' {

	if `grad' == 0  {
		local grad_flag = ""
		local years 2016 2017 2018
	}

	if `grad' == 1 {
		local grad_flag = "grad"
		local years
	}

	* Loop over years
	foreach year of local years {

		global year "`year'"

		use "${cleandata}match_file_for_pscore_with_sim_lotteries_ms_`year'_new_sped`insuffix'.dta", clear
		replace programcode = programcode + "_SPED"
		replace ourmatch = ourmatch + "_SPED"
		append using "${cleandata}match_file_for_pscore_with_sim_lotteries_ms_`year'_new_nonsped`insuffix'.dta"

		* Making sure lottery numbers are unique
		* Lottery rank mod is zero in some cases
		preserve
			keep stu lottery_rank
			collapse (max) lottery_rank, by(stu)
			unique lottery_rank
			assert `r(sum)' == `r(N)'
		restore

		* Generate indicator for programs we consider to use the lottery number
		gen description = programtype
		gen lottery = inlist(description,"Unscreened","Zoned","Limited Unscreened","Charter")
		gen student_rank = lottery_rank
		sum student_rank

		* Replace offer with simulated offer
		gen simoffer = ourmatch == programcode
		rename offer offer_orig
		gen offer = simoffer

		* Generate an indicator for offers at higher ranked schools
		sort stu choice
		by stu : gen higher_offer_sim	 = 0 if _n == 1
		by stu : replace higher_offer_sim = max(offer[_n-1] , higher_offer_sim[_n-1] ) if _n > 1

	* ----------------------------------------------------------------------------
	* i. marginal priority
	* ----------------------------------------------------------------------------

		* Generate marginal priority (priority group with last offer)
		bys programcode: egen marginal_priority = max(global_priority * offer)
		format marginal_priority %12.0g

		* Generate marginal indicator
		gen marginal = global_priority == marginal_priority

		* Generate offer count by program
		bys programcode global_prio: egen prog_count = sum(offer)

		* Generate indicator for missing ranks
		gen rank_orig = rank
		gen indi_missing_rank = (rank == . & lottery == 0)

		* Replace missing RVs with max x 1000
		sum rank
		replace rank = r(max) * 1000 if indi_missing_rank == 1
		scalar scalar_missing_rank = r(max) * 1000

		* Rerank robustness check (no gaps in running variables)
		if "`suffix'" == "_rerank" {

			preserve
				* We drop duplicates in RVs, so that we can keep the cases where
				* there is mass at the cutoff
				keep programcode global_prio rank

				duplicates drop

				* sorting as we would do to simulate da
				sort programcode global_prio rank

				* preserve this ranking and generate one variable that preserve the ordering
				by programcode  global_prio: gen reranked = _n
				tempfile reranked
				sa `reranked'
			restore

			merge m:1 programcode global_prio rank using `reranked', nogen
			sort programcode global_prio rank

			replace rank = reranked
		}
		* Rescaling runvars to (0,1], as described in the paper
		* Notice that we do that within the marginal group only
		bys programcode: egen runvar_max =  max(rank ) if lottery == 0 & marginal == 1 & indi_missing_rank == 0
		bys programcode: egen runvar_min =  min(rank ) if lottery == 0 & marginal == 1 & indi_missing_rank == 0

		gen rank_no_rescale = rank

		replace rank = ( rank_no_rescale - runvar_min+1) / (runvar_max -  runvar_min+1) if (runvar_max -  runvar_min != 0)

		replace rank = 1 if (runvar_max -  runvar_min == 0)

		replace rank = 99 if indi_missing_rank == 1

		* assert re-scaling was successful
		su rank if indi_missing_rank == 0
		assert `r(max)' <= 1 & `r(min)' > 0

	* Get ranks for composite scores and test outcomes

		foreach var in compositescore testoutcome{

			* Generate indicator for missing ranks
			gen `var'_orig = `var'
			gen indi_missing_`var' = (`var' == . & lottery == 0)

			* Replace missing RVs with max x 1000
			qui su `var'
			replace `var' = r(max) * 1000 if indi_missing_`var' == 1
			scalar scalar_missing_`var' = r(max) * 1000

			* Rescaling runvars to (0,1], as described in the paper
			* Notice that we do that within the marginal group only
			bys programcode: egen `var'_max =  max(`var' ) if lottery == 0 & marginal == 1 & indi_missing_`var' == 0
			bys programcode: egen `var'_min =  min(`var' ) if lottery == 0 & marginal == 1 & indi_missing_`var' == 0

			gen `var'_no_rescale = `var'

			replace `var' = (`var'_no_rescale - `var'_min+1) / (`var'_max -  `var'_min+1) if (`var'_max -  `var'_min != 0)

			replace `var' = 1 if (`var'_max - `var'_min == 0)

			replace `var' = 99 if indi_missing_`var' == 1

			* Invert test scores so that a lower value -> higher ranked
			replace `var' = 1-`var' if `var' != 99 & `var' != 1
			replace `var' = .000000001 if `var' == 1 & !(`var'_max - `var'_min == 0)

			* assert re-scaling was successful
			su `var' if indi_missing_`var' == 0
			assert `r(max)' <= 1 & `r(min)' > 0

		}

		* Combine rank variable
		replace rank = compositescore if programtype == "False Test Outcome"
		replace rank = testoutcome if programtype == "Test Outcome"

	* ----------------------------------------------------------------------------
	* ii. cutoffs
	* ----------------------------------------------------------------------------

		* Set cutoff as the last *marginal* student who gets an offer (with max lottery or rank)
		bys programcode: egen double lottery_cutoff  = max( student_rank * offer * marginal * lottery)

		* Set rank cutoff
		bys programcode: egen double rank_cutoff  = max( rank * offer * marginal * !lottery)
		format rank_cutoff %12.0g

		* Generate temporary lottery flag that includes cases of missing runvar
		gen lottery_flag_temp = lottery
		replace lottery_flag_temp = 1 if rank_cutoff == 99

		**********************************************
		****** MODIFYING LOTTERY FLAG
		**********************************************

	/*	Certain programs have multiple applicants who share the same rank value
		(mostly missing or 1) and are at the cutoff. In such cases, the lottery
		number is the actual tie-breaker but often such programs are still marked
		as screened. We are going to identify these programs as programs where
			- missing bandwidth
			- more than one kid at the rank cutoff
			- for kids who have rank == rank_cutoff, there is variation in offer
			- the program is a non lottery program. */

		* Generate variable that indicates variation in offers cutoff
		bys programcode : egen var_in_offer_at_cutoff = sd(offer ) if  rank == rank_cutoff & marginal == 1 & higher_offer_sim == 0
		bysort programcode: egen max_var_in_offer_at_cutoff = max(var_in_offer_at_cutoff)
		drop var_in_offer_at_cutoff
		rename max_var_in_offer_at_cutoff var_in_offer_at_cutoff

		* Generate variable that indicates number of applicants at cutoff
		bys programcode  global_prio rank: gen apps_at_cutoff = _N if  rank == rank_cutoff & marginal == 1 & higher_offer_sim == 0
		bysort programcode: egen max_apps_at_cutoff = max(apps_at_cutoff)
		drop apps_at_cutoff
		rename max_apps_at_cutoff apps_at_cutoff

		* Generate variable indicating screened program that used lottery number
		* as tie-breaker
		bys programcode: egen screened_uses_lottery_part1 = max(  (apps_at_cutoff > 1) & (apps_at_cutoff !=. )  & ///
		( var_in_offer_at_cutoff > 0 ) & (var_in_offer_at_cutoff !=. ) & (lottery_flag_temp == 0 ) )    // this indicates screened programs that ultimately have to use lottery numbers

		/* Gen modified lottery flag to account for the edge cases caught by screened_uses_lottery
		We are then going to use this modified flag for the pscore calculations and MID calculations below.*/
		gen lottery_flag = lottery
		replace lottery_flag = 1 if screened_uses_lottery_part1 == 1
		replace lottery_flag = 1 if lottery_flag_temp == 1 & lottery == 0

		* Generate indicator for screened programs breaking ties with lottery number
		gen screened_uses_lottery = screened_uses_lottery_part1 == 1 | (lottery_flag_temp == 1 & lottery == 0)

		* Generate lottery cutoff for  programs that are screened but use lottery
		bys programcode: egen double lottery_cutoff_screened  = max( student_rank * offer * marginal * ( rank == rank_cutoff) * screened_uses_lottery)  if screened_uses_lottery == 1

		* Update lottery cutoff
		replace lottery_cutoff = lottery_cutoff_screened  if screened_uses_lottery == 1

	* ----------------------------------------------------------------------------
	* iii. Bandwidth
	* ----------------------------------------------------------------------------

		* Generate indicator variables for programs using rank variable to break ties
		egen non_lotto_programcode = group( programcode ) if lottery_flag == 0

		* Generate centered rank variable (cutoff = 0)
		gen hs_rank_centered  = rank - rank_cutoff

		* Generate variable that checks for marginal applicants above the cutoff
		bys programcode: egen fully_ranked_prog = max( marginal * (hs_rank_centered > 0 ) * !lottery_flag_temp)
		la var fully_ranked_prog "flag if non-lottery program had marginal students with hs_rank_centered > 0 i.e. students ranked above the cutoff "

		/* 	Compute bandwidths in separate program because it takes a lot of time.
			This separate program takes the file saved below, does the IK calculation,
			then outputs a file which we merge below. */

		* Run this program only once and use output for the other outcomes
		if `run_bw' == 1 & `grad' == 0 {
			save "${cleandata}data_for_bw_ms_`year'`grad_flag'`suffix'.dta", replace
			do "${code_clean_NYCms}pscores_mdrd2_bw.do" `grad_sep' "`suffix'"
			use "${cleandata}data_for_bw_ms_`year'`grad_flag'`suffix'.dta", clear
		}

		* Merge back in
		merge m:1 programcode using "${cleandata}bw_for_schools_min_ms_`year'_`grad_sep'`suffix'.dta", assert(1 3) nogen

		* implement selected bandwidth
		gen bw = `bw'

		* Replace bandwidth for Graduation version if activated
		if `grad' == 1 {
			drop bw
			gen bw = `grad_flag'`bw'
		}

		* Set BW to missing if lottery school
		replace bw = . if lottery_flag == 1

			 sort stu choice

			* Generate cutoff distance
			gen cutoff_distance = abs(hs_rank_centered) ///
					if lottery_flag == 0 & (marginal == 1)

			*sort stu programcode choice_aug
			sort stu programcode choice

			* Generate minimum cutoff distance for student x program combination
			by stu programcode: egen min_cutoff_dist = min(cutoff_dist)

			gen indi_min_cutoff_dist = min_cutoff_dist == cutoff_dis ///
					if cutoff_dis != . & min_cutoff_dist !=.

			* Generate indicators for applicants in/above/below the bandwidth
			/*	Note that we do this twice due to the fact that we limit risk to programs
				where at least 5 applicants are on either side of the cutoff within
				the bandwidth.
				Hence, after generating the indicators, we find the number of students in the
				bandwidth, then set the bandwidth to missing for programs with fewer than
				5 applicants on either side of the cutoff within the bandwidth, and then
				recalculate whether an applicant is in the bandwidth */

			* Gen indicator for being in the bandwidth (removed if edopt == 0)
			gen in_bw =  hs_rank_centered > -bw &  hs_rank_centered <= bw & (marginal == 1) & !missing(bw)  ///
				if lottery_flag == 0

			* Gen indicator for being below the bandwidth (removed if edopt == 0)
			gen below_bw = hs_rank_centered <= -bw 		& (marginal == 1) & !missing(bw) if lottery_flag == 0

			* Gen indicator for being above the bandwidth (removed if edopt == 0)
			gen above_bw = hs_rank_centered >  bw 		& (marginal == 1) & !missing(bw) if lottery_flag == 0

			* Generate check that each applicant is at max in/below/above the bandwidth
			egen check = rowtotal(in_bw below_bw above_bw) if marginal == 1 & lottery_flag == 0 & !missing(bw)
			sum check

			assert `r(max)' == 1 & `r(min)' == 1
			drop check

			* Tag if the program has a bandwidth. This effectively tags which programs are screened.
			gen has_bw = bw != .

		*** Implement bandwidth population criterion
			* Set BW to Missing for programs where less than five kids are on one
			* or both sides of the cutoff within the bandwidth

			* Number of applicants in bandwidth
			bysort programcode: egen no_in_bw = total(in_bw)

			* Number of applicants in bandwidth above cutoff
			bysort programcode: egen no_in_bw_above = total(in_bw) if hs_rank_centered > 0 & hs_rank_centered!=.
			* Number of applicants in bandwidth below cutoff
			bysort programcode: egen no_in_bw_below = total(in_bw) if hs_rank_centered <= 0 & hs_rank_centered!=.
			bysort programcode: egen max_no_in_bw_above = max(no_in_bw_above)
			bysort programcode: egen max_no_in_bw_below = max(no_in_bw_below)
			replace no_in_bw_above =  max_no_in_bw_above
			replace no_in_bw_below =  max_no_in_bw_below

			* Generate indicator for programs with less than 5 on either side of the
			* cutoff within the BW
			bysort programcode: gen no_in_bw_below_10 = no_in_bw_above < 5 | no_in_bw_below < 5

			* save preliminary dataset before setting bandwidths to zero
			sa "${cleandata}`bw'_pre_drop_ms_`year'`grad_flag'", replace

			* Replace Bandwidth to missing for such programs (no new risk)
			replace bw = . if no_in_bw_below_10 == 1

			* Drop indicators
			drop max_no_in_bw_above max_no_in_bw_below no_in_bw_below no_in_bw_above

		*** Truncate the bandwidth when it is much larger on one side than the other
			* (because it is close to the top or bottom of the priority)

			* First, generate minimum and maximum value (in absolute value) of rank in the
			* bandwidth
			sort programcode
			by programcode : egen max_bw_val = max(hs_rank_centered) if in_bw == 1
			assert max_bw_val >= 0
			by programcode : egen min_bw_val = min(hs_rank_centered) if in_bw == 1
			by programcode : gen abs_min_bw_val = abs(min_bw_val)

			* Generate variable that takes minimum absolute value across the left-most
			* and right-most extremes of the bw.
			gen bw_mod_temp = min(max_bw_val, abs_min_bw_val)
			by programcode : egen bw_mod = min(bw_mod_temp)

			* Summary stats of the scale of the difference between the two bandwidths
			/*egen tag = tag(prog_id_augmented)
			count if bw != bw_mod
			gen diff_bw = bw - bw_mod
			su diff_bw if tag == 1 */

			* Replace bandwidth with the smallest side of the bandwidth
			assert !mi(bw_mod) if !mi(bw)
			replace bw = bw_mod

			* Drop irrelevant variables
			drop bw_mod_temp bw_mod max_bw_val min_bw_val abs_min_bw_val

		*** Re-do the in/above/below bandwidth indicators after implementing the count (removed if edopt == 0 from gen *bw lines)
			drop in_bw below_bw above_bw

			gen in_bw =   hs_rank_centered > -bw &  hs_rank_centered <= bw & (marginal == 1) & !missing(bw)  ///
				if lottery_flag == 0
			*replace in_bw = hs_rank_centered > -bw &  hs_rank_centered <= bw  & (marginal == 1) & !missing(bw) & indi_min_cutoff_dist == 1 if lottery_flag == 0  & edopt == 1

			gen below_bw = hs_rank_centered <= -bw 		& (marginal == 1) & !missing(bw) if lottery_flag == 0
			*replace below_bw = hs_rank_centered <= 0	  	& (marginal == 1) & !missing(bw) if lottery_flag == 0 & edopt == 1 & in_bw == 0

			gen above_bw = hs_rank_centered >  bw 		& (marginal == 1) & !missing(bw) if lottery_flag == 0
			*replace above_bw = hs_rank_centered >  0 	 	& (marginal == 1) & !missing(bw) if lottery_flag == 0 & edopt == 1 & in_bw == 0

			egen check = rowtotal(in_bw below_bw above_bw) if marginal == 1 & lottery_flag == 0 & !missing(bw)
			sum check
			assert `r(max)' == 1 & `r(min)' == 1
			drop check

		* Re-tag if the program has a bw. This effectively tags which programs are screened.
		replace has_bw = bw != .

		* Save an intermediary file before creating the Mdrd2 relevant indicators
		save "${cleandata}program_pscore_`bw'`modification_str'`suffix'_ms_`year'_before_theta`grad_flag'.dta", replace

		*** Generate variables for robustness checks
			* Duplicates, gaps and number of applicants in BW

		* 	1. Computing duplicates
			duplicates tag programcode global_priority rank, gen(rvtag_duplicats)

		*	2. Compute threshold for gaps in running varibale
			* Sort
			sort programcode global_priority rank_orig
			* Generate variable for number of steps from last rank
			by programcode global_priority: gen delta_rv_marg_in_bw = rank_orig[_n] - rank_orig[_n-1] ///
				if lottery_flag == 0  & marginal == 1 & in_bw == 1

			local modification_str
			if "`dup_threshold'" != "" {
				* Indicator for duplicates in bandwidth larger than threshold
				bys programcode: egen dup_problem = max((rvtag_duplicats > (`dup_threshold' - 1) ) & (in_bw == 1) & lottery_flag == 0 & marginal == 1)
				local modification_str "`modification_str'_`dup_threshold'dup"
				local modification_vars "dup_problem"
			}
			if "`delta_threshold'" != "" {
				* Indicator for gap larger than the treshold
				bys programcode: egen delta_problem = max(delta_rv_marg_in_bw >  `delta_threshold' & delta_rv_marg_in_bw != . )
				local modification_str "`modification_str'_`delta_threshold'delta"
			}
			drop delta_rv_marg_in_bw

		local modification_vars

		*	3. flag these cases
			if "`dup_threshold'" != "" &  "`delta_threshold'" == ""  {

				local modification_vars "dup_problem"

			}
			else if "`dup_threshold'" == "" &  "`delta_threshold'" != "" {
				local modification_vars "delta_problem"

			}
			else if "`dup_threshold'" != "" &  "`delta_threshold'" != "" {
				local modification_vars "dup_problem | delta_problem"

			}

		*	4. Modify bandwidths

			if "`dup_threshold'" != "" | "`delta_threshold'" != "" | `double_bw' == 1 | `half_bw' == 1  {

				* Final dummy for Either Delta or Duplicate Issue
				if "`dup_threshold'" != "" | "`delta_threshold'" != "" {
					bys programcode: egen cont_problem = max(`modification_vars')
					** replacing the IK bandwidth
					replace bw = . if cont_problem == 1
				}

				if `double_bw' == 1 {
					gen double_bw = bw * 2
					replace bw = double_bw
					local modification_str "_doublebw"
				}

				if `half_bw' == 1 {
					gen half_bw = bw / 2
					replace bw = half_bw
					local modification_str "_halfbw"
				}

			* Replacing the in/below/above bandwidth indicators after modifying (removed if edopt == 0 from gen *bw lines)
			drop in_bw below_bw above_bw

			gen in_bw = 	hs_rank_centered > 	-bw & hs_rank_centered <= bw & (marginal == 1) & !missing(bw) if lottery_flag == 0
			gen below_bw = 	hs_rank_centered <= -bw & (marginal == 1) & !missing(bw) if lottery_flag == 0
			gen above_bw = 	hs_rank_centered >   bw & (marginal == 1) & !missing(bw) if lottery_flag == 0

			egen check = rowtotal(in_bw below_bw above_bw) if marginal == 1 & lottery_flag == 0 & !missing(bw)
			sum check
			assert `r(max)' == 1 & `r(min)' == 1
			drop check

			* Re-tag if the program has a bw. This effectively tags which programs are screened.
			replace has_bw = bw != .
		}

	* ----------------------------------------------------------------------------
	* iv. Get t's
	* ----------------------------------------------------------------------------

		*	Always seated
			* Applicant clears marginal priority (Theta^a)
			* or is marginal but below the bandwidth at screened program or below or
			* at the cutoff at screened program where we couldn't get a
			* bandwidth or below the cutoff at screened using lottery program
			gen t_a = (marginal_priority > global_priority) ///
				| (lottery_flag == 0 & marginal == 1 & below_bw == 1 ) ///
				| (lottery_flag == 0 & marginal == 1 & hs_rank_centered <= 0 & missing(bw) ) ///
				| (lottery_flag == 1 & marginal == 1 & (screened_uses_lottery == 1 ) & (hs_rank_centered < 0)  )

		*	Never seated
			* Applicant fails to clear marginal priority (Theta^n)
			* or is marginal but above the bandwidth at screened program or above
			* the cutoff at screened program where we couldn't get a
			* bandwidth or above the cutoff at screened using lottery program
			gen t_n =  (marginal_priority < global_priority ) ///
				| (lottery_flag == 0 & marginal == 1 & hs_rank_centered > 0 & missing(bw) )   ///
				| (lottery_flag == 0 & marginal == 1 & above_bw == 1 )   ///
				| (lottery_flag == 1 & marginal == 1 & (screened_uses_lottery == 1 ) & (hs_rank_centered > 0)  )

		*	Conditionally seated
			* Applicant is marginal and in bandwidth at screened program
			* or marginal at a lottery school or marginal and at the cutoff at a
			* screened using lottery school
			gen t_c = 	(lottery_flag == 0 & marginal_priority == global_priority & in_bw == 1)   ///
				| (lottery_flag == 1 & marginal_priority == global_priority & (screened_uses_lottery == 0 )  ) ///
				| (lottery_flag == 1 & marginal_priority == global_priority & (screened_uses_lottery == 1 ) & (hs_rank_centered == 0)  )


		* Check we partition the set of applicants
		egen check = rowtotal(t_?)
		su check
		assert `r(min)' == 1 & `r(max)' == 1
		drop check

		* Perform several checks

		* No offers if t_n == 1
		su offer if t_n == 1
		assert `r(max)' == 0
		* Offer if no higher offers and t_a == 1
		su offer if t_a == 1 & higher_offer_sim == 0
		assert `r(min)' == 1
		* Offer if t_c == 1 & no higher offers & lottery number clears cutoff
		su offer if (t_c == 1) & (higher_offer_sim == 0) & (lottery_flag == 1) & (lottery_cutoff > student_rank)
		assert `r(min)' == 1
		* No offer if t_c == 1 & no higher offers & lottery number fails to clear cutoff
		su offer if (t_c == 1) & (higher_offer_sim == 0) & (lottery_flag == 1) & (lottery_cutoff < student_rank)
		assert `r(max)' == 0
		* Offer if t_c == 1 & no higher offers & rank clears cutoff
		su offer if (t_c == 1) & (higher_offer_sim == 0) & (lottery_flag == 0) & (hs_rank_centered <= 0 )
		assert `r(min)' == 1
		* No offer if t_c == 1 & no higher offers & rank fails to clear cutoff
		su offer if (t_c == 1) & (higher_offer_sim == 0) & (lottery_flag == 0) & (hs_rank_centered > 0 )
		assert `r(max)' == 0

		* Code t's for screening risk pscore. These will treat the lottery numbers as fixed.
		* Hence, an applicant can't be marginal at a lottery school, only t=a or t=n

		*	Always seated
			gen bw_t_a = (marginal_priority > global_priority) ///
				| (lottery_flag == 0 & marginal == 1 & below_bw == 1 & screened_uses_lottery == 0 ) ///
				| (lottery_flag == 0 & marginal == 1 & hs_rank_centered <= 0 & missing(bw) )  ///
				| (lottery_flag == 1 & marginal == 1 & student_rank <= lottery_cutoff  & (screened_uses_lottery == 0 ))  ///
				| ((lottery_flag == 1 ) & (marginal == 1 ) & (student_rank <= lottery_cutoff) & (screened_uses_lottery == 1 ) & (hs_rank_centered == 0)) ///
				| ((lottery_flag == 1 ) & (marginal == 1 ) & (hs_rank_centered < 0) & (screened_uses_lottery == 1 ) )

		*	Never seated
			gen bw_t_n = (marginal_priority < global_priority)  ///
				| (lottery_flag == 0 & marginal == 1 & above_bw == 1 & screened_uses_lottery == 0 ) ///
				| ((lottery_flag == 0 ) & (marginal == 1 ) & ( hs_rank_centered > 0 ) & missing(bw) ) ///
				| ((lottery_flag == 1 ) & (marginal == 1 ) & (student_rank > lottery_cutoff) & (screened_uses_lottery == 0 )) /// Need to break out the screened programs that use lotteries
				| ((lottery_flag == 1 ) & (marginal == 1 ) & (student_rank > lottery_cutoff) & (screened_uses_lottery == 1 ) & (hs_rank_centered == 0)) ///
				| ((lottery_flag == 1 ) & (marginal == 1 ) & (hs_rank_centered > 0) & (screened_uses_lottery == 1 ) )

		* 	Conditionally seated
			gen bw_t_c =  ( (lottery_flag == 0 ) & (marginal_priority == global_priority) & (in_bw == 1) )

		* Check that t's partion all applicants
		egen check = rowtotal(bw_t_?)
		su check

		assert `r(min)' == 1 & `r(max)' == 1
		drop check

	* ----------------------------------------------------------------------------
	* v. MID
	* ----------------------------------------------------------------------------

		* 	case 1: ever get more preferred
			* Applicant is ever in t=a at any schools above school s
			* (for screened schools you are marginal at the school but are below the left hand window of the bandwidth)
			sort stu choice
			gen ever_seated_more_preferred = 0
			la var ever_seated_more_preferred " 1 if you ever cleared marginal priority at a more preferred school"
			by stu: replace ever_seated_more_preferred =  max(ever_seated_more_preferred[_n-1 ], t_a[ _n-1 ] ) if _n > 1  //maximum so that if its ever 1 the following chain will be 1.

		* 	case 2: never get more preferred
			* Applicant is always in t=n at any higher ranked school
			sort stu choice
			* By convention mid is 0 at first choice, because applicant can never get a better choice
			gen never_get_more_preferred = 1
			la var never_get_more_preferred "Student never clears marginal priority at more preferred schools"
			by stu: replace never_get_more_preferred =  min(never_get_more_preferred[_n-1 ], t_n[ _n-1 ] ) if _n > 1

		* 	case 3: conditionally get more preferred
			* Applicant is in t=c in at least one more preferred school, but never t=a

			* Check if applicant is ever marginal at a more preferred school
			sort stu choice
			gen ever_marginal_more_preferred = 0
			by stu: replace ever_marginal_more_preferred = max(ever_marginal_more_preferred[_n-1 ], t_c[ _n-1 ] ) if _n > 1

			* Check if applicant is always either t_c or t_n at more preferred schools (never t_a)?
			gen either_t_cn = max(t_c, t_n)
			gen always_t_cn_more_preferred = 1
			by stu: replace always_t_cn_more_preferred =  min(always_t_cn_more_preferred[_n-1 ], either_t_cn[ _n-1 ] ) if _n > 1

			* Check if applicant is t_a but with at least one marginal school? (non-degenerate better set risk)
			gen sometimes_get_more_preferred = always_t_cn_more_preferred == 1 & ever_marginal_more_preferred == 1
			la var sometimes_get_more_preferred "Not guaranteed a spot at a higher rank school, but at least marginal in a more preferred school"

		* Check that these definitions partition the set of applicants
		egen check = rowtotal(sometimes_get_more_preferred  ever_seated_more_preferred never_get_more_preferred)
		su check
		assert `r(max)' == 1 & `r(min)'  == 1
		drop check

		***	Isolating screening risk
			* we need an equivalent version of ever_seated_more_preferred that only uses the bw
			* variation i.e. the bw_big_thetas. Since we aren't actually going to calculate
			* MID we don't need the other constructs.

		*	case 1
			* Applicant is ever in t=a at any schools above school s
			sort stu choice
			gen bw_ever_seated_more_preferred = 0
			by stu: replace bw_ever_seated_more_preferred =  max(bw_ever_seated_more_preferred[_n-1 ], bw_t_a[ _n-1 ] ) if _n > 1


		***	MID computation
			* MID boils down to risk generated by lottery schools in the better set

			* Initialize at missing. Should not have missing for lottery applications
			gen double mid = .

		* case 1
			* set to 0 if applicant is always in t_n at more preferred lottery schools
			replace mid = 0 if never_get_more_preferred == 1

		* case 2
			* set to 1 if applicant is ever t_a at a more preferred lottery school
			* (explicitly restricting to lottery schools doesn't change p-score calculation
			* since risk will be degenerate anyways)
			replace mid = 1 if ever_seated_more_preferred == 1

		* case 3
			* non-degenerate better set risk
			sort  stu choice
			* only consider the lagged cutoff if applicant is t_c at that lottery school.
			by  stu: gen lagged_lottery_cutoff = lottery_cutoff[_n - 1 ] * t_c[_n-1] * lottery_flag[_n-1]

			by  stu: replace mid = max(mid[_n - 1], lagged_lottery_cutoff )  if sometimes_get_more_preferred == 1
			* Replace first lottery choice to zero.
			by  stu: replace mid = 0  if _n == 1

			* Fill in mid for non lottery schools, we need this to calculate the pscores
			sort  stu choice
			by stu: replace mid = 0 if _n ==1
			by stu: replace mid = mid[_n-1] if _n > 1 & mid == .

	* ----------------------------------------------------------------------------
	* vi. p-scores
	* ----------------------------------------------------------------------------

		sort stu choice

		* Local score with single non-stochastic tie-breaking
		gen double pscore_rank = 0 if !lottery_flag & ( t_n == 1 | ever_seated_more_preferred == 1 )
		replace pscore_rank = 1 if !lottery_flag & ( t_a == 1 &  ever_seated_more_preferred == 0 )
		replace pscore_rank = 0.5 if !lottery_flag & ( t_c == 1 &  ever_seated_more_preferred == 0 )

		* Solve running count problem (little m)
		* We need to know how many times an applicant has pscore_rank == 0.5 at more preferred schools.
		* pscore_rank == 1 is already taken care of with ever_seated_more_preferred
		sort  stu choice
		by stu: gen number_of_bw = 0 if _n == 1
		by stu:	replace number_of_bw = number_of_bw[_n-1] + (( pscore_rank[_n - 1 ] == 0.5) * (lottery_flag[_n-1] == 0 ))   if _n > 1


		sort stu choice
		su pscore

		*** Code local general risk pscore

			gen double pscore = .

		* 	Degenerate case
			replace pscore = 0 if t_n == 1 | ever_seated_more_preferred == 1

			gen double one_minus_mid = 1 - mid
			la var one_minus_mid " this is lottery number truncation"

		*	Better set risk only
			replace pscore = 1 if t_a == 1 & ever_seated_more_preferred == 0
			replace pscore =  one_minus_mid * 0.5^(number_of_bw) ///
				if t_a == 1 & ever_seated_more_preferred == 0

		*	Lottery school with risk at s
			replace pscore =  one_minus_mid * 0.5^(number_of_bw) * max(0, ( lottery_cutoff - mid) / one_minus_mid  ) ///
				if t_c == 1 & ever_seated_more_preferred == 0 & lottery_flag == 1

		*	Screened school with risk at s
			replace pscore =  one_minus_mid * 0.5^(number_of_bw) * pscore_rank ///
				if t_c == 1 & ever_seated_more_preferred == 0 & lottery_flag == 0

			count if pscore == .
			assert `r(N)' ==  0

		* Rename this pscore_form
		ren pscore pscore_form

		***	 Isolate screening risk
			* (based solely on the variation within the cutoff)

		gen double pscore_bw = .

		*	Degenerate case
			replace pscore_bw = 0 if bw_t_n == 1 | bw_ever_seated_more_preferred == 1

		*	Better set risk only
			replace pscore_bw = 1 if bw_t_a == 1 & bw_ever_seated_more_preferred == 0
			replace pscore_bw =  0.5^(number_of_bw) ///
				if bw_t_a == 1 & bw_ever_seated_more_preferred == 0

		*	Screened school with risk at s
			replace pscore_bw =  0.5^(number_of_bw) * pscore_rank ///
				if bw_t_c == 1 & bw_ever_seated_more_preferred == 0 & lottery_flag == 0

			count if pscore_bw == .
			assert `r(N)' ==  0

		ren pscore_bw pscore_form_bw

		* Make pscore which is based solely on the variation around the cutoff, with no disqualification risk
		gen double pscore_qbw = 0
		replace pscore_qbw = 0.5 if bw_t_c == 1

		* Compute frequency score
		bys programcode mid t_a t_c t_n : egen double pscore_freq = mean(offer)

		* For applicants who are in t_n, mid doesn't matter.
		bys programcode t_n: egen double frequency_offer_intermediate = mean(offer)

		replace pscore_freq = frequency_offer_intermediate if t_n == 1
		drop frequency_offer_intermediate

		* Get screening risk frequency score
		bys programcode mid bw_t_a bw_t_c bw_t_n : egen double pscore_freq_bw = mean(offer)
		* Again, for applicants who are in t_n , mid doesn't matter.
		bys programcode bw_t_n: egen double frequency_offer_intermediate = mean(offer)

		replace pscore_freq_bw = frequency_offer_intermediate if t_n == 1
		drop frequency_offer_intermediate

		* Generate Indicator for offer if pscore was 0
		bys stu : egen ever_0_got_offer = max( offer == 1 & pscore_form_bw == 0)

		qui compress

		* Save
		save "${cleandata}program_pscore_`bw'`modification_str'`suffix'_ms_`year'`grad_flag'.dta", replace

		* Compute the running variable controls
		if `run_runvars' == 1 do "${code_clean_NYCms}pscores_mdrd2_rv.do" "program_pscore_`bw'`modification_str'`suffix'_ms_`year'`grad_flag'.dta"

	}
}

end
